import pandas as pd

raw = pd.read_excel('../original/Georgia 2020 RLA Report.xlsx',
                    header=1)

raw = raw[['County','Trump','Biden','Jorgensen']]
raw = pd.melt(raw, var_name='candidate', id_vars = 'County', value_name='audited')
raw = raw.rename(columns={'County':'county'})
raw = raw.loc[~pd.isna(raw.county)]
raw = raw.loc[raw.county != 'TOTALS']

replacements = {'Trump': 'DONALD J TRUMP',
                'Biden': 'JOSEPH R BIDEN',
                'Jorgensen': 'JO JORGENSEN'
                }
for r in replacements:
    raw.loc[raw.candidate == r, 'candidate'] = replacements[r]

official = pd.read_csv(
  '../support/countypres_2000-2020.csv')
official = official.loc[(official.state == 'GEORGIA') &
                        (official.year == 2020)
                        ]
official = official.rename(columns={'county_name': 'county',
                                    'candidatevotes': 'original'})
official = official.groupby(['county','candidate']
                            ).original.sum().reset_index()
official.loc[official.candidate == 'JOSEPH R BIDEN JR',
            'candidate'] = 'JOSEPH R BIDEN'
ga = pd.merge(raw,official,on=['county','candidate'])

ga['state'] = 'GEORGIA'
ga['method'] = 'RLA'
ga['office'] = 'US PRESIDENT'
ga['difference'] = ga.original - ga.audited

ga.to_csv('../ready/GA_cleaned.csv', index=False)
